from bs4 import BeautifulSoup
# 调用requests库获取网页
import requests

url = "https://news.baidu.com/"
ua = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36'
}
req = requests.get(url, headers=ua)
# print(req.content.decode('utf-8'))
html = req.content.decode('utf-8')
soup = BeautifulSoup(html, 'lxml') # 生成BeautifulSoup对象
# print("输出格式化的BeautifulSoup对象：", soup.prettify())

# 获取head标签
# print("获取head标签: ", soup.head)

# 获取title标签
# print("获取title标签：", soup.title)

# 获取body标签中的第一个a标签
# print("获取第一个a标签：", soup.body.a)

# 获取所有a标签
# print("所有名称为a的标签：", soup.find_all('a'))

# print(type(soup.find_all('a'))) # set集合
# 获取所有名称为a的标签个数
# print("所有名称为a的标签的个数：", len(soup.find_all('a')))

# 获取soup的name
# print("soup的name：", soup.name)
# 获取a标签的name
# print("a标签的name：", soup.a.name)

tag = soup.a
# print("tag的name: ", tag.name) # 获取tag的name
print("tag的内容：", tag)

# 获取tag对象的全部属性
print("tag对象的全部属性：", tag.attrs)
# 获取href属性的值
print("href属性的值：", tag.attrs['href'])
